2007 data included day/month/hour columns rather than datetime

This notebook adds datetime values as the first row and rewrites the excel file


In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns


/Users/Home/anaconda/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')

In [20]:
fn = 'rpt.00013424.0000000000000000.20141016.182537113.ERCOT_2007_Hourly_Wind_Output.xls'
df = pd.read_excel(fn, 'numbers')

In [4]:
df.head()


Out[4]:
Month Date Hour ERCOT Load, MW Total Wind Installed, MW Total Wind Output, MW Wind Output, % of Installed Wind Output, % of Load 1-hr MW change 1-hr % change Wind Daily Change on Hour % of Daily Wind Change on Hour
0 1 1 0 30428 2790 1074.0 38.494624 3.529644 NaN NaN NaN NaN
1 1 1 1 30133 2790 922.6 33.068100 3.061760 -151.4 -14.096834 NaN NaN
2 1 1 2 29941 2790 849.2 30.437276 2.836245 -73.4 -7.955777 NaN NaN
3 1 1 3 29949 2790 1056.3 37.860215 3.526996 207.1 24.387659 NaN NaN
4 1 1 4 30248 2790 837.1 30.003584 2.767456 -219.2 -20.751680 NaN NaN

In [21]:
df['Year'] = 2007

In [6]:
df.head()


Out[6]:
Month Date Hour ERCOT Load, MW Total Wind Installed, MW Total Wind Output, MW Wind Output, % of Installed Wind Output, % of Load 1-hr MW change 1-hr % change Wind Daily Change on Hour % of Daily Wind Change on Hour Year
0 1 1 0 30428 2790 1074.0 38.494624 3.529644 NaN NaN NaN NaN 2007
1 1 1 1 30133 2790 922.6 33.068100 3.061760 -151.4 -14.096834 NaN NaN 2007
2 1 1 2 29941 2790 849.2 30.437276 2.836245 -73.4 -7.955777 NaN NaN 2007
3 1 1 3 29949 2790 1056.3 37.860215 3.526996 207.1 24.387659 NaN NaN 2007
4 1 1 4 30248 2790 837.1 30.003584 2.767456 -219.2 -20.751680 NaN NaN 2007

In [22]:
df.loc[:,'Day'] = df.loc[:,'Date']

In [23]:
df.loc[:,'time-date'] = pd.to_datetime(df[['Year', 'Month', 'Day', 'Hour']],format='%Y%m%d%h')

In [17]:
df.head()


Out[17]:
Month Date Hour ERCOT Load, MW Total Wind Installed, MW Total Wind Output, MW Wind Output, % of Installed Wind Output, % of Load 1-hr MW change 1-hr % change Wind Daily Change on Hour % of Daily Wind Change on Hour Year Day time-date
0 1 1 0 30428 2790 1074.0 38.494624 3.529644 NaN NaN NaN NaN 2007 1 2007-01-01 00:00:00
1 1 1 1 30133 2790 922.6 33.068100 3.061760 -151.4 -14.096834 NaN NaN 2007 1 2007-01-01 01:00:00
2 1 1 2 29941 2790 849.2 30.437276 2.836245 -73.4 -7.955777 NaN NaN 2007 1 2007-01-01 02:00:00
3 1 1 3 29949 2790 1056.3 37.860215 3.526996 207.1 24.387659 NaN NaN 2007 1 2007-01-01 03:00:00
4 1 1 4 30248 2790 837.1 30.003584 2.767456 -219.2 -20.751680 NaN NaN 2007 1 2007-01-01 04:00:00

In [24]:
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-3]
df = df[cols]

In [25]:
df.head()


Out[25]:
time-date Month Date Hour ERCOT Load, MW Total Wind Installed, MW Total Wind Output, MW Wind Output, % of Installed Wind Output, % of Load 1-hr MW change 1-hr % change Wind Daily Change on Hour % of Daily Wind Change on Hour
0 2007-01-01 00:00:00 1 1 0 30428 2790 1074.0 38.494624 3.529644 NaN NaN NaN NaN
1 2007-01-01 01:00:00 1 1 1 30133 2790 922.6 33.068100 3.061760 -151.4 -14.096834 NaN NaN
2 2007-01-01 02:00:00 1 1 2 29941 2790 849.2 30.437276 2.836245 -73.4 -7.955777 NaN NaN
3 2007-01-01 03:00:00 1 1 3 29949 2790 1056.3 37.860215 3.526996 207.1 24.387659 NaN NaN
4 2007-01-01 04:00:00 1 1 4 30248 2790 837.1 30.003584 2.767456 -219.2 -20.751680 NaN NaN

In [27]:
writer = pd.ExcelWriter(fn)
df.to_excel(writer,'numbers')
writer.save()

In [ ]: